In [1]:
import sys
import os
import time
import numpy as np
import pandas as pd
import umap

print('NOVA_HOME is at', os.getenv('NOVA_HOME'))
sys.path.insert(1, os.getenv('NOVA_HOME'))
%load_ext autoreload
%autoreload 2

from src.common.utils import load_config_file
from src.embeddings.embeddings_utils import load_embeddings
from src.figures.distances_plotting import *
from src.analysis.analyzer_distances_utils import summarize_times, merge_batches_by_key, correlate_columns
NOVA_HOME is at /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA
NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA

UMAP2¶

In [2]:
dist_folder = '/home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen/figures/neuronsDay8_new/distances'

Batch 9 (with SNCA)¶

In [3]:
df2 = pd.read_csv(f'{dist_folder}/batch9_all_reps_WT_TDP43_OPTN_TBK1_FUSRevertant_FUSHeterozygous_FUSHomozygous_SNCA_Untreated_without_CD41/distances_stats_euclidean_detailed_multiplexed.csv')
for col in ['label1','label2']:
    df2[col] = df2[col].str.split(pat='_', n=1).str[0]
In [4]:
df2.head()
Out[4]:
label1 label2 block_size total_pairs dist_time_s stats_time_s p5 p10 p25 p50 p75 p90 p95 lower_whisker upper_whisker
0 FUSHeterozygous FUSHeterozygous 963 463203 0.052585 0.042789 0.522287 0.532076 0.548703 0.567466 0.586266 0.603136 0.613112 0.492359 0.642610
1 FUSHeterozygous FUSHomozygous 963 596097 0.000782 0.000843 0.534047 0.543667 0.559954 0.578516 0.597462 0.614746 0.625169 0.503693 0.653724
2 FUSHeterozygous FUSRevertant 963 1249974 0.001365 0.001773 0.563255 0.572589 0.588347 0.606197 0.624242 0.640746 0.650700 0.534505 0.678085
3 FUSHeterozygous OPTN 963 1527318 0.001012 0.001835 0.559709 0.569534 0.586190 0.604718 0.623413 0.640459 0.650847 0.530355 0.679247
4 FUSHeterozygous SNCA 963 1144044 0.000643 0.001587 0.563962 0.573173 0.588802 0.606559 0.624620 0.641323 0.651342 0.535074 0.678347
In [5]:
plot_custom_boxplot(df2)
In [6]:
plot_dist_histogram(df2)
In [7]:
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
In [8]:
## Optional: Stretch the scale for visualization
# df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
In [9]:
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))

Batch 1¶

In [17]:
df2 = pd.read_csv(f'{dist_folder}/batch1_all_reps_WT_TDP43_OPTN_TBK1_FUSRevertant_FUSHeterozygous_FUSHomozygous_Untreated_without_CD41/distances_stats_euclidean_detailed_multiplexed.csv')
for col in ['label1','label2']:
    df2[col] = df2[col].str.split(pat='_', n=1).str[0]
df2.sort_values('p50')[['label1', 'label2', 'p50']]
Out[17]:
label1 label2 p50
22 TBK1 TBK1 0.539661
7 FUSHomozygous FUSHomozygous 0.545753
25 TDP43 TDP43 0.546862
23 TBK1 TDP43 0.547113
0 FUSHeterozygous FUSHeterozygous 0.553231
19 OPTN TBK1 0.553576
18 OPTN OPTN 0.554861
27 WT WT 0.557363
26 TDP43 WT 0.559161
20 OPTN TDP43 0.560376
24 TBK1 WT 0.560482
1 FUSHeterozygous FUSHomozygous 0.561633
13 FUSRevertant FUSRevertant 0.565079
10 FUSHomozygous TBK1 0.571328
21 OPTN WT 0.571863
11 FUSHomozygous TDP43 0.574052
14 FUSRevertant OPTN 0.574406
15 FUSRevertant TBK1 0.577892
4 FUSHeterozygous TBK1 0.578370
3 FUSHeterozygous OPTN 0.580904
5 FUSHeterozygous TDP43 0.583487
9 FUSHomozygous OPTN 0.584027
16 FUSRevertant TDP43 0.585284
12 FUSHomozygous WT 0.587608
2 FUSHeterozygous FUSRevertant 0.588762
17 FUSRevertant WT 0.591656
6 FUSHeterozygous WT 0.591661
8 FUSHomozygous FUSRevertant 0.608589
In [18]:
plot_custom_boxplot(df2)
# df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))

Batch 2¶

In [19]:
df2 = pd.read_csv(f'{dist_folder}/batch2_all_reps_WT_TDP43_OPTN_TBK1_FUSRevertant_FUSHeterozygous_FUSHomozygous_Untreated_without_CD41/distances_stats_euclidean_detailed_multiplexed.csv')
for col in ['label1','label2']:
    df2[col] = df2[col].str.split(pat='_', n=1).str[0]
df2.sort_values('p50')[['label1', 'label2', 'p50']]
Out[19]:
label1 label2 p50
7 FUSHomozygous FUSHomozygous 0.546936
0 FUSHeterozygous FUSHeterozygous 0.548499
22 TBK1 TBK1 0.548950
1 FUSHeterozygous FUSHomozygous 0.555975
23 TBK1 TDP43 0.562944
27 WT WT 0.563282
25 TDP43 TDP43 0.563887
24 TBK1 WT 0.567673
15 FUSRevertant TBK1 0.569236
13 FUSRevertant FUSRevertant 0.569552
18 OPTN OPTN 0.569705
19 OPTN TBK1 0.570390
26 TDP43 WT 0.572139
4 FUSHeterozygous TBK1 0.574397
21 OPTN WT 0.575518
10 FUSHomozygous TBK1 0.577372
16 FUSRevertant TDP43 0.579854
20 OPTN TDP43 0.580374
3 FUSHeterozygous OPTN 0.581884
11 FUSHomozygous TDP43 0.583449
17 FUSRevertant WT 0.583861
6 FUSHeterozygous WT 0.584608
5 FUSHeterozygous TDP43 0.585057
9 FUSHomozygous OPTN 0.586790
2 FUSHeterozygous FUSRevertant 0.587176
14 FUSRevertant OPTN 0.588517
12 FUSHomozygous WT 0.590300
8 FUSHomozygous FUSRevertant 0.593408
In [20]:
plot_custom_boxplot(df2)
# df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))

Batch 3¶

In [21]:
df2 = pd.read_csv(f'{dist_folder}/batch3_all_reps_WT_TDP43_OPTN_TBK1_FUSRevertant_FUSHeterozygous_FUSHomozygous_Untreated_without_CD41/distances_stats_euclidean_detailed_multiplexed.csv')
for col in ['label1','label2']:
    df2[col] = df2[col].str.split(pat='_', n=1).str[0]
df2.sort_values('p50')[['label1', 'label2', 'p50']]
Out[21]:
label1 label2 p50
7 FUSHomozygous FUSHomozygous 0.536293
25 TDP43 TDP43 0.547416
22 TBK1 TBK1 0.549955
1 FUSHeterozygous FUSHomozygous 0.553319
0 FUSHeterozygous FUSHeterozygous 0.554651
13 FUSRevertant FUSRevertant 0.560835
18 OPTN OPTN 0.561831
20 OPTN TDP43 0.562299
19 OPTN TBK1 0.562334
23 TBK1 TDP43 0.562578
27 WT WT 0.568445
15 FUSRevertant TBK1 0.568820
26 TDP43 WT 0.570272
16 FUSRevertant TDP43 0.573265
24 TBK1 WT 0.573452
11 FUSHomozygous TDP43 0.573738
10 FUSHomozygous TBK1 0.574295
21 OPTN WT 0.574835
14 FUSRevertant OPTN 0.575962
4 FUSHeterozygous TBK1 0.578538
9 FUSHomozygous OPTN 0.578937
12 FUSHomozygous WT 0.583245
8 FUSHomozygous FUSRevertant 0.585845
3 FUSHeterozygous OPTN 0.587613
17 FUSRevertant WT 0.587810
5 FUSHeterozygous TDP43 0.588240
2 FUSHeterozygous FUSRevertant 0.588509
6 FUSHeterozygous WT 0.591562
In [22]:
plot_custom_boxplot(df2)
# df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))

Batch 7 (with SNCA)¶

In [23]:
df2 = pd.read_csv(f'{dist_folder}/batch7_all_reps_WT_TDP43_OPTN_TBK1_FUSRevertant_FUSHeterozygous_FUSHomozygous_SNCA_Untreated_without_CD41/distances_stats_euclidean_detailed_multiplexed.csv')
for col in ['label1','label2']:
    df2[col] = df2[col].str.split(pat='_', n=1).str[0]
df2.sort_values('p50')[['label1', 'label2', 'p50']]
Out[23]:
label1 label2 p50
26 SNCA SNCA 0.590263
8 FUSHomozygous FUSHomozygous 0.592641
0 FUSHeterozygous FUSHeterozygous 0.597765
35 WT WT 0.602324
1 FUSHeterozygous FUSHomozygous 0.602382
33 TDP43 TDP43 0.605519
11 FUSHomozygous SNCA 0.606795
15 FUSRevertant FUSRevertant 0.612042
17 FUSRevertant SNCA 0.616061
14 FUSHomozygous WT 0.616914
27 SNCA TBK1 0.616984
9 FUSHomozygous FUSRevertant 0.617293
4 FUSHeterozygous SNCA 0.617327
10 FUSHomozygous OPTN 0.619508
20 FUSRevertant WT 0.619564
19 FUSRevertant TDP43 0.619578
30 TBK1 TBK1 0.619816
34 TDP43 WT 0.620989
18 FUSRevertant TBK1 0.621272
13 FUSHomozygous TDP43 0.622004
16 FUSRevertant OPTN 0.622823
12 FUSHomozygous TBK1 0.622991
25 OPTN WT 0.624109
29 SNCA WT 0.624229
31 TBK1 TDP43 0.624381
28 SNCA TDP43 0.624533
2 FUSHeterozygous FUSRevertant 0.624797
23 OPTN TBK1 0.626227
7 FUSHeterozygous WT 0.626431
24 OPTN TDP43 0.626550
32 TBK1 WT 0.628361
22 OPTN SNCA 0.628681
6 FUSHeterozygous TDP43 0.632302
5 FUSHeterozygous TBK1 0.632824
21 OPTN OPTN 0.635142
3 FUSHeterozygous OPTN 0.636716
In [24]:
plot_custom_boxplot(df2)
# df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))

Batch 8 (with SNCA)¶

In [25]:
df2 = pd.read_csv(f'{dist_folder}/batch8_all_reps_WT_TDP43_OPTN_TBK1_FUSRevertant_FUSHeterozygous_FUSHomozygous_SNCA_Untreated_without_CD41/distances_stats_euclidean_detailed_multiplexed.csv')
for col in ['label1','label2']:
    df2[col] = df2[col].str.split(pat='_', n=1).str[0]
df2.sort_values('p50')[['label1', 'label2', 'p50']]
Out[25]:
label1 label2 p50
26 SNCA SNCA 0.562314
0 FUSHeterozygous FUSHeterozygous 0.565484
8 FUSHomozygous FUSHomozygous 0.565594
1 FUSHeterozygous FUSHomozygous 0.575056
15 FUSRevertant FUSRevertant 0.577881
17 FUSRevertant SNCA 0.581271
35 WT WT 0.582747
33 TDP43 TDP43 0.589061
19 FUSRevertant TDP43 0.592446
21 OPTN OPTN 0.592797
16 FUSRevertant OPTN 0.593564
28 SNCA TDP43 0.594863
29 SNCA WT 0.594963
24 OPTN TDP43 0.595967
30 TBK1 TBK1 0.598290
22 OPTN SNCA 0.598613
20 FUSRevertant WT 0.599341
34 TDP43 WT 0.601126
18 FUSRevertant TBK1 0.601983
4 FUSHeterozygous SNCA 0.602158
31 TBK1 TDP43 0.603401
11 FUSHomozygous SNCA 0.604468
2 FUSHeterozygous FUSRevertant 0.605318
23 OPTN TBK1 0.605512
9 FUSHomozygous FUSRevertant 0.606204
32 TBK1 WT 0.606550
25 OPTN WT 0.606767
14 FUSHomozygous WT 0.609112
6 FUSHeterozygous TDP43 0.609151
27 SNCA TBK1 0.610576
13 FUSHomozygous TDP43 0.611968
7 FUSHeterozygous WT 0.612699
3 FUSHeterozygous OPTN 0.614839
10 FUSHomozygous OPTN 0.617135
12 FUSHomozygous TBK1 0.621817
5 FUSHeterozygous TBK1 0.622719
In [26]:
plot_custom_boxplot(df2)
# df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))

Batch 10¶

In [27]:
df2 = pd.read_csv(f'{dist_folder}/batch10_all_reps_WT_TDP43_OPTN_TBK1_FUSRevertant_FUSHeterozygous_FUSHomozygous_Untreated_without_CD41/distances_stats_euclidean_detailed_multiplexed.csv')
for col in ['label1','label2']:
    df2[col] = df2[col].str.split(pat='_', n=1).str[0]
df2.sort_values('p50')[['label1', 'label2', 'p50']]
Out[27]:
label1 label2 p50
7 FUSHomozygous FUSHomozygous 0.566417
27 WT WT 0.566874
25 TDP43 TDP43 0.572795
26 TDP43 WT 0.576132
18 OPTN OPTN 0.579129
20 OPTN TDP43 0.584910
0 FUSHeterozygous FUSHeterozygous 0.587293
21 OPTN WT 0.589179
22 TBK1 TBK1 0.599869
13 FUSRevertant FUSRevertant 0.599920
19 OPTN TBK1 0.603438
23 TBK1 TDP43 0.606968
1 FUSHeterozygous FUSHomozygous 0.609432
11 FUSHomozygous TDP43 0.609702
12 FUSHomozygous WT 0.611286
9 FUSHomozygous OPTN 0.611436
15 FUSRevertant TBK1 0.615793
24 TBK1 WT 0.616532
14 FUSRevertant OPTN 0.617504
10 FUSHomozygous TBK1 0.619092
2 FUSHeterozygous FUSRevertant 0.623455
16 FUSRevertant TDP43 0.631512
8 FUSHomozygous FUSRevertant 0.631727
4 FUSHeterozygous TBK1 0.635872
17 FUSRevertant WT 0.641997
3 FUSHeterozygous OPTN 0.645154
5 FUSHeterozygous TDP43 0.660190
6 FUSHeterozygous WT 0.670141
In [28]:
plot_custom_boxplot(df2)
# df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))

Correlations between batches (without SNCA)¶

In [86]:
batches = range(1,10)

dfs, names = [], []
for b in batches:
    path = (
        f'{dist_folder}/'
        f'batch{b}_all_reps_WT_TDP43_OPTN_TBK1_FUSRevertant_FUSHeterozygous_FUSHomozygous_Untreated_without_CD41/distances_stats_euclidean_detailed_multiplexed.csv'
    )
    try:
        df = pd.read_csv(path)
        # strip the "_WT_Untreated" (or everything after first "_")
        for col in ("label1","label2"):
            df[col] = df[col].str.split(pat='_', n=1).str[0]
        dfs.append(df)
        names.append(f"batch{b}")
    except:
        print('batch', b, 'doesnt exists')
merged_df, cols = merge_batches_by_key(dfs, names) 
batch 4 doesnt exists
batch 5 doesnt exists
batch 6 doesnt exists
In [87]:
# 1) heatmap of correlations

method = 'spearman'
corr_df, pval_df = correlate_columns(merged_df, cols, method=method)
plot_correlation_heatmap(corr_df, method=method)
plot_pval_heatmap(pval_df, labels=cols)
In [88]:
method = 'pearson'
corr_df, pval_df = correlate_columns(merged_df, cols, method=method)
plot_correlation_heatmap(corr_df, method=method)
plot_pval_heatmap(pval_df, labels=cols)
In [89]:
# 2) pairplot of the p50 columns
sns.pairplot(merged_df[[f"p50_{n}" for n in names]])
plt.suptitle("p50 distributions & relationships", y=1.02)
plt.show()

Batches 1,2,3,10 are correlated (I removed batch 10 from analysis)¶

Batches 7,8,9 are correlated¶

Compare ditsances across batches¶

In [90]:
plot_boxplot_all_pairs(merged_df)
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
In [91]:
plot_p50_across_batches(merged_df)
In [ ]:
 

UMAP1¶

batch 9¶

In [51]:
df = pd.read_csv(f'{dist_folder}/batch9_all_reps_WT_Untreated_without_CD41/distances_stats_euclidean_detailed.csv')
for col in ['label1','label2']:
    df[col] = df[col].str.split(pat='_', n=1).str[0]
labels = pd.unique(pd.concat([df['label1'], df['label2']]))
In [52]:
df.head()
Out[52]:
label1 label2 block_size total_pairs dist_time_s stats_time_s p5 p10 p25 p50 p75 p90 p95 lower_whisker upper_whisker
0 ANXA11 ANXA11 3071 4713985 0.187604 0.097533 0.393329 0.429983 0.499990 0.591702 0.697070 0.798619 0.859730 0.204370 0.992690
1 ANXA11 CLTC 3071 19565341 0.000926 0.496859 0.679525 0.707355 0.757179 0.817852 0.881633 0.939887 0.975436 0.570497 1.068314
2 ANXA11 Calreticulin 3071 20403724 0.000952 0.536098 0.794656 0.821079 0.863632 0.909104 0.953718 0.995312 1.022292 0.728503 1.088847
3 ANXA11 DAPI 3071 193595840 0.006809 4.802346 0.965059 0.987980 1.024955 1.067759 1.113970 1.157091 1.182280 0.891431 1.247494
4 ANXA11 DCP1A 3071 20965717 0.001709 0.541207 0.729927 0.766814 0.826225 0.886376 0.944993 1.001384 1.038418 0.648073 1.123144

Box plot per label¶

In [53]:
for label in np.unique(labels):
    print('Marker:', label)
    plot_custom_boxplot(df[(df['label1'].str.contains(label))|(df['label2'].str.contains(label))])
Marker: ANXA11
Marker: CLTC
Marker: Calreticulin
Marker: DAPI
Marker: DCP1A
Marker: FMRP
Marker: FUS
Marker: G3BP1
Marker: GM130
Marker: HNRNPA1
Marker: KIF5A
Marker: LAMP1
Marker: LSM14A
Marker: NCL
Marker: NEMO
Marker: NONO
Marker: PEX14
Marker: PML
Marker: PSD95
Marker: PURA
Marker: Phalloidin
Marker: SNCA
Marker: SON
Marker: SQSTM1
Marker: TDP43
Marker: TIA1
Marker: TOMM20
Marker: Tubulin
Marker: mitotracker
In [54]:
df.nsmallest(20, 'p50')[['label1', 'label2', 'p50']]
Out[54]:
label1 label2 p50
299 NCL NCL 0.373354
407 SON SON 0.379395
84 DAPI DAPI 0.418604
57 Calreticulin Calreticulin 0.483635
225 HNRNPA1 HNRNPA1 0.487893
429 TOMM20 TOMM20 0.501046
135 FMRP FMRP 0.503806
330 NONO NONO 0.504985
369 PSD95 PSD95 0.505048
390 Phalloidin Phalloidin 0.505826
432 Tubulin Tubulin 0.506859
182 G3BP1 G3BP1 0.508532
414 SQSTM1 SQSTM1 0.509300
425 TIA1 TIA1 0.511997
434 mitotracker mitotracker 0.512869
418 SQSTM1 Tubulin 0.518675
315 NEMO NEMO 0.522225
420 TDP43 TDP43 0.538871
357 PML PML 0.539121
159 FUS FUS 0.545235

Nearest clusters¶

In [55]:
df.loc[df['label1'] != df['label2']].nsmallest(10, 'p50')[['label1', 'label2', 'p50']]
Out[55]:
label1 label2 p50
418 SQSTM1 Tubulin 0.518675
393 Phalloidin SQSTM1 0.593089
83 Calreticulin mitotracker 0.598340
397 Phalloidin Tubulin 0.601287
308 NCL SON 0.610600
396 Phalloidin TOMM20 0.635211
379 PSD95 mitotracker 0.635387
194 G3BP1 PURA 0.635628
137 FMRP G3BP1 0.636537
48 CLTC Phalloidin 0.636593

Tightest clusters¶

In [56]:
df.loc[df['label1'] == df['label2']].nsmallest(30, 'p50')[['label1', 'label2', 'p50']]
Out[56]:
label1 label2 p50
299 NCL NCL 0.373354
407 SON SON 0.379395
84 DAPI DAPI 0.418604
57 Calreticulin Calreticulin 0.483635
225 HNRNPA1 HNRNPA1 0.487893
429 TOMM20 TOMM20 0.501046
135 FMRP FMRP 0.503806
330 NONO NONO 0.504985
369 PSD95 PSD95 0.505048
390 Phalloidin Phalloidin 0.505826
432 Tubulin Tubulin 0.506859
182 G3BP1 G3BP1 0.508532
414 SQSTM1 SQSTM1 0.509300
425 TIA1 TIA1 0.511997
434 mitotracker mitotracker 0.512869
315 NEMO NEMO 0.522225
420 TDP43 TDP43 0.538871
357 PML PML 0.539121
159 FUS FUS 0.545235
29 CLTC CLTC 0.550498
245 KIF5A KIF5A 0.553527
204 GM130 GM130 0.562313
0 ANXA11 ANXA11 0.591702
380 PURA PURA 0.610373
344 PEX14 PEX14 0.610649
110 DCP1A DCP1A 0.611440
264 LAMP1 LAMP1 0.628887
399 SNCA SNCA 0.652723
282 LSM14A LSM14A 0.695335

Most distant clusters¶

In [57]:
df.loc[df['label1'] != df['label2']].nlargest(10, 'p50')[['label1', 'label2', 'p50']]
Out[57]:
label1 label2 p50
108 DAPI Tubulin 1.163382
104 DAPI SQSTM1 1.144320
101 DAPI Phalloidin 1.143468
31 CLTC DAPI 1.112972
323 NEMO SON 1.103250
95 DAPI NEMO 1.094177
300 NCL NEMO 1.093336
67 Calreticulin LSM14A 1.091403
187 G3BP1 LSM14A 1.089859
288 LSM14A PSD95 1.089667
In [58]:
plot_dist_histogram(df)
In [59]:
plot_label_clustermap(df, figsize=(9,9), highlight_thresh = 0.8) ## Set threshold to highlight
In [60]:
plot_cluster_proximity_network(df, metric='p50', top_k=200, )

Reproducibility across batches¶

Load all available batches distances (umap1)¶

In [81]:
batches = range(1,10)

dfs, names = [], []
for b in batches:
    path = (
        f'{dist_folder}/batch{b}_all_reps_WT_Untreated_without_CD41/distances_stats_euclidean_detailed.csv'
    )
    try:
        df = pd.read_csv(path)
        # strip the "_WT_Untreated" (or everything after first "_")
        for col in ("label1","label2"):
            df[col] = df[col].str.split(pat='_', n=1).str[0]
        dfs.append(df)
        names.append(f"batch{b}")
    except:
        print('batch', b, 'doesnt exists')
merged_df, cols = merge_batches_by_key(dfs, names) 
batch 4 doesnt exists
batch 5 doesnt exists
batch 6 doesnt exists
In [82]:
method = 'spearman'
corr_df, pval_df = correlate_columns(merged_df, cols, method=method)
plot_correlation_heatmap(corr_df, method=method)
plot_pval_heatmap(pval_df, labels=cols)
In [83]:
method = 'pearson'
corr_df, pval_df = correlate_columns(merged_df, cols, method=method)
plot_correlation_heatmap(corr_df, method=method)
plot_pval_heatmap(pval_df, labels=cols)
In [84]:
# 2) pairplot of the p50 columns
sns.pairplot(merged_df[[f"p50_{n}" for n in names]])
plt.suptitle("p50 distributions & relationships", y=1.02)
plt.show()
In [85]:
for label in np.unique(labels):
    print('Marker:', label)
    plot_boxplot_all_pairs(merged_df[(merged_df['label1'].str.contains(label))|(merged_df['label2'].str.contains(label))])
Marker: ANXA11
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: CLTC
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: Calreticulin
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: DAPI
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: DCP1A
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: FMRP
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: FUS
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: G3BP1
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: GM130
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: HNRNPA1
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: KIF5A
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: LAMP1
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: LSM14A
Marker: NCL
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: NEMO
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: NONO
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: PEX14
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: PML
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: PSD95
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: PURA
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: Phalloidin
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
Marker: SNCA
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: SON
Marker: SQSTM1
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: TDP43
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: TIA1
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: TOMM20
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: Tubulin
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([
Marker: mitotracker
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:446: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/src/figures/distances_plotting.py:454: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_xticklabels([

The distances are correlated between batches¶

In [ ]:
 

Batch 1¶

In [65]:
df = pd.read_csv(f'{dist_folder}/batch1_all_reps_WT_Untreated_without_CD41/distances_stats_euclidean_detailed.csv')
for col in ['label1','label2']:
    df[col] = df[col].str.split(pat='_', n=1).str[0]
labels = pd.unique(pd.concat([df['label1'], df['label2']]))
In [66]:
plot_label_clustermap(df, figsize=(6,6), highlight_thresh=df[df.label1 == df.label2]['p50'].max())
plot_cluster_proximity_network(df, threshold = df['p50'].quantile(0.9), figsize=(7,7))

Batch 2¶

In [67]:
df = pd.read_csv(f'{dist_folder}/batch2_all_reps_WT_Untreated_without_CD41/distances_stats_euclidean_detailed.csv')
for col in ['label1','label2']:
    df[col] = df[col].str.split(pat='_', n=1).str[0]
labels = pd.unique(pd.concat([df['label1'], df['label2']]))
In [68]:
plot_label_clustermap(df, figsize=(6,6), highlight_thresh=df[df.label1 == df.label2]['p50'].max())
plot_cluster_proximity_network(df, threshold = df['p50'].quantile(0.9), figsize=(7,7))

Batch 3¶

In [69]:
df = pd.read_csv(f'{dist_folder}/batch3_all_reps_WT_Untreated_without_CD41/distances_stats_euclidean_detailed.csv')
for col in ['label1','label2']:
    df[col] = df[col].str.split(pat='_', n=1).str[0]
labels = pd.unique(pd.concat([df['label1'], df['label2']]))
In [70]:
plot_label_clustermap(df, figsize=(6,6), highlight_thresh=df[df.label1 == df.label2]['p50'].max())
plot_cluster_proximity_network(df, threshold = df['p50'].quantile(0.9), figsize=(7,7))

Batch 7¶

In [71]:
df = pd.read_csv(f'{dist_folder}/batch7_all_reps_WT_Untreated_without_CD41/distances_stats_euclidean_detailed.csv')
for col in ['label1','label2']:
    df[col] = df[col].str.split(pat='_', n=1).str[0]
labels = pd.unique(pd.concat([df['label1'], df['label2']]))
In [72]:
plot_label_clustermap(df, figsize=(6,6), highlight_thresh=df[df.label1 == df.label2]['p50'].max())
plot_cluster_proximity_network(df, threshold = df['p50'].quantile(0.9), figsize=(7,7))

Batch 8¶

In [73]:
df = pd.read_csv(f'{dist_folder}/batch8_all_reps_WT_Untreated_without_CD41/distances_stats_euclidean_detailed.csv')
for col in ['label1','label2']:
    df[col] = df[col].str.split(pat='_', n=1).str[0]
labels = pd.unique(pd.concat([df['label1'], df['label2']]))
In [74]:
plot_label_clustermap(df, figsize=(6,6), highlight_thresh=df[df.label1 == df.label2]['p50'].max())
plot_cluster_proximity_network(df, threshold = df['p50'].quantile(0.9), figsize=(7,7))

Batch 10¶

In [75]:
df = pd.read_csv(f'{dist_folder}/batch10_all_reps_WT_Untreated_without_CD41/distances_stats_euclidean_detailed.csv')
for col in ['label1','label2']:
    df[col] = df[col].str.split(pat='_', n=1).str[0]
labels = pd.unique(pd.concat([df['label1'], df['label2']]))
In [76]:
plot_label_clustermap(df, figsize=(6,6), highlight_thresh=df[df.label1 == df.label2]['p50'].max())
plot_cluster_proximity_network(df, threshold = df['p50'].quantile(0.9), figsize=(7,7))
In [ ]: